suppressPackageStartupMessages(source("/home/guz/project/development/cola/load.R"))
data = read.table("/icgc/dkfzlsdf/analysis/B080/guz/subgroup_test/unifiedScaled.txt",
header = TRUE, row.names = 1, check.names = FALSE)
data = as.matrix(data)
subtype = read.table("/icgc/dkfzlsdf/analysis/B080/guz/subgroup_test/TCGA_unified_CORE_ClaNC840.txt",
sep = "\t", header = TRUE, check.names = FALSE, stringsAsFactors = FALSE)
subtype = structure(unlist(subtype[1, -(1:2)]), names = colnames(subtype)[-(1:2)])
data = data[, names(subtype)]
dim(data)
## [1] 11861 173
table(subtype)
## subtype
## Classical Mesenchymal Neural Proneural
## 38 56 26 53
Get all supported top methods and partition methods:
ALL_TOP_VALUE_METHOD()
## [1] "sd" "vc" "MAD" "AAC"
ALL_PARTITION_METHOD()
## [1] "hclust" "kmeans" "skmeans" "Mclust" "clara" "pam" "cclust"
Run clustering for all combination of methods in batch:
res_list = run_all(data, top_n = c(2000, 4000, 6000), k = 2:6, known = subtype, mc.cores = 4)
res_list = readRDS("/icgc/dkfzlsdf/analysis/B080/guz/subgroup_test/TCGA_subgroup_p0.8.rds")
res_list
## Top rows are extracted by 'sd, vc, MAD, AAC' methods.
## Subgroups are detected by 'hclust, kmeans, skmeans, Mclust, clara, pam, cclust' method.
## Number of partitions are tried for k = 2, 3, 4, 5, 6
Collect all plots for a k:
collect_plots(res_list, fun = plot_ecdf)
collect_plots(res_list, k = 3, fun = consensus_heatmap)
collect_plots(res_list, k = 3, fun = membership_heatmap)
collect_plots(res_list, k = 3, fun = get_signatures)
## 119/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 7802 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 165/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 8401 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 171/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 8356 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 162/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 8581 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 158/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 8781 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 157/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 8631 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 154/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 8446 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 81/173 samples (in 2 classes) remain after filtering by silhouette (>= 0.5).
## 5287 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 145/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 9736 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 140/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 9418 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 152/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 9627 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 166/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 8963 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 164/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 8974 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 130/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 9702 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 58/173 samples (in 2 classes) remain after filtering by silhouette (>= 0.5).
## 7606 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 122/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 9444 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 126/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 9603 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 145/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 9535 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 161/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 8932 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 160/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 8931 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 85/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 9253 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 118/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 9449 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 127/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 9409 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 161/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 9443 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 153/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 9594 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 164/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 8811 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 161/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 8729 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 105/173 samples (in 2 classes) remain after filtering by silhouette (>= 0.5).
## 7696 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
Overlap of top rows in different top methods:
par(mfrow = c(1, 3))
top_rows_overlap(res_list, top_n = 2000)
top_rows_overlap(res_list, top_n = 4000)
top_rows_overlap(res_list, top_n = 6000)
Also visualize the correspondance of rankings between different scoreing methods:
top_rows_overlap(res_list, top_n = 2000, type = "correspondance")
Heatmaps for the top rows:
top_rows_heatmap(res_list, top_n = 2000)
Get clustering in a specified combination of top method and partition method:
res = get_single_run(res_list, top_method = "AAC", partition_method = "skmeans")
res
## top rows are extracted by 'AAC' method.
## Subgroups are detected by 'skmeans' method.
## Number of partitionings are tried for k = 2, 3, 4, 5, 6
Collect all plots
collect_plots(res)
## 173/173 samples (in 2 classes) remain after filtering by silhouette (>= 0.5).
## 7618 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 161/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 9443 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 132/173 samples (in 4 classes) remain after filtering by silhouette (>= 0.5).
## 9678 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 145/173 samples (in 5 classes) remain after filtering by silhouette (>= 0.5).
## 10861 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
## 130/173 samples (in 6 classes) remain after filtering by silhouette (>= 0.5).
## 10447 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
plots:
select_k(res)
consensus_heatmap(res, k = 3)
membership_heatmap(res, k = 3)
get_signatures(res, k = 3)
## 161/173 samples (in 3 classes) remain after filtering by silhouette (>= 0.5).
## 9443 signatures under fdr < 0.05
## Only take top 5000 signatures with highest fdr
Get classifications
class_df = get_class(res, k = 3)
head(class_df)
## p1 p2 p3 silhouette class
## TCGA-02-0003-01A-01 0 1 0 0.977354 2
## TCGA-02-0010-01A-01 0 1 0 0.977354 2
## TCGA-02-0011-01B-01 0 1 0 0.977354 2
## TCGA-02-0014-01A-01 0 1 0 0.977354 2
## TCGA-02-0024-01B-01 0 1 0 0.977354 2
## TCGA-02-0026-01B-01 0 1 0 0.977354 2
MDS or T-sne plots:
dimension_reduction(res, k = 3)
dimension_reduction(res, k = 3, method = "tsne")
Consistency of classes.
collect_classes(res_list, k = 3)
collect_classes(res)